function [Record_fitness, theta_opt] = fit_GPR_v_20(y, theta_initial, simPar, is_disp)%调整GPR参数

if ~exist('is_disp', 'var')
    is_disp = false;
end

c           = simPar.c;
k0          = simPar.k0;
sigma2_n    = simPar.sigma2_n;

x           = simPar.x;
p           = simPar.p;
xb           = simPar.xb;
pb           = simPar.pb;
N           = length(y);
%     assert(N == simPar.N);

v   = theta_initial.v;
mu  = theta_initial.mu;

rho = 2*vecnorm(y)^2/N/(1+sigma2_n);                 % Initial channel power.

N_iter          = simPar.channelEstimator.GPR.N_iter;
Record_fitness  = zeros(N_iter, 3);
alpha           = 3;

% init for mu.

mu_R = 10;%???

y1 = y(1:simPar.N) ;
Nig = 10;               % initial guess
mu_directions = asin(linspace(-1+1/Nig, 1-1/Nig, Nig));
thetas      = (linspace(-1+1/Nig, 1-1/Nig, Nig));
[A, ~]          = iget_A(simPar.N, thetas);
[~, idx_max]    = max(abs(A'*y1));
theta_max       = asin(thetas(idx_max));
Clip = 30;


%     ell_ig = zeros(Nig, 1);
%     for idx_guess = 1:Nig
%         mu = mu_R * [sin(mu_directions(idx_guess)); 0; -cos(mu_directions(idx_guess))];
%
%         theta = struct('rho', rho, 'v', v, 'mu', mu);                                                   % Set parameters.
%         [~, ~, ell_ig(idx_guess), ~] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, true);       % force single kernel. 前向推断函数，反向传播
%     end
%
%     [~, idx_M] = max(ell_ig);

%     mu = mu_R * [sin(mu_directions(idx_M)); 0; -cos(mu_directions(idx_M))];
% mu = mu_R * [sin(theta_max); 0; cos(theta_max)];
%     mu  = theta_initial.mu;

method = simPar.channelEstimator.GPR.optimizer;
N_iter = simPar.channelEstimator.GPR.N_iter;
mu_arr = zeros(3, N_iter);

if simPar.channelEstimator.GPR.mixedKernel == false
    if method == "GradientAscent"

        for iter = 1:N_iter

            theta = struct('rho', rho, 'v', v, 'mu', mu);       % Set parameters.
            [A, B, ell, d] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, Clip);

            mu = mu + 0.3*d.d_mu;

            Record_fitness(iter, 1) = ell;
            Record_fitness(iter, 2) = A;
            Record_fitness(iter, 3) = B;

            if is_disp
                fprintf('%d: fitness = %f, mu = [%.4f, %.4f, %.4f], rho = %.4f\n',...
                    iter, ell, mu(1), mu(2), mu(3), rho);
            end

            mu_arr(:, iter) = mu;
        end

    elseif method == "Armijo-Goldstein"
        kappa = 0.5;
        alpha = 0.03; % LR. 学习率


        for iter = 1:N_iter
            theta = struct('rho', rho, 'v', v, 'mu', mu);       % Set parameters.
            [A, B, ell1, d1] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, true);
            Record_fitness(iter, 1) = ell1;
            Record_fitness(iter, 2) = A;
            Record_fitness(iter, 3) = B;

            loop = true;
            %优化mu
            mu0 = mu;
            while loop
                mu = mu0 + alpha*d1.d_mu;
                theta = struct('rho', rho, 'v', v, 'mu', mu);           % Set parameters.
                [~, ~, ell2, ~] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, true);

                Q = ell1 + alpha*norm(d1.d_mu)^2 - norm(alpha*d1.d_mu)^2/alpha;   % Q == ell1 in this case.
                if ell2 <= Q
                    alpha = alpha * kappa;
                else
                    loop = false;
                end
            end
            %优化v
            v0 = v;
            loop = true;
            while loop
                v = v0 + alpha*d1.d_v;
                theta = struct('rho', rho, 'v', v, 'mu', mu);           % Set parameters.
                [~, ~, ell2, ~] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, true);

                Q = ell1 + alpha*norm(d1.d_v)^2 - norm(alpha*d1.d_v)^2/alpha;   % Q == ell1 in this case.
                if ell2 <= Q
                    alpha = alpha * kappa;
                else
                    loop = false;
                end
            end


        end

    elseif method == "Fletcher-Reeves"
        % init forward
        theta = struct('rho', rho, 'v', v, 'mu', mu);   % Set parameters.
        [~, ~, ~, d] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, Clip);
        d_mu = d.d_mu;
        r = d_mu;

        for iter = 1:N_iter

            mu = mu + alpha*d_mu;

            theta = struct('rho', rho, 'v', v, 'mu', mu);       % Set parameters.
            [A, B, ell, rnew] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, Clip);
            beta = (rnew'*rnew)/(r'*r);
            d_mu = rnew + beta*d_mu;

            Record_fitness(iter, 1) = ell;
            Record_fitness(iter, 2) = A;
            Record_fitness(iter, 3) = B;

            % rho = rho * exp(eta_rho*rho*d_ell_rho);

            if is_disp
                fprintf('%d: fitness = %f, mu = [%.4f, %.4f, %.4f], rho = %.4f\n',...
                    iter, ell, mu(1), mu(2), mu(3), rho);
            end

            mu_arr(:, iter) = mu;
        end

    else
        error("Method not implemented.");

    end

    theta_opt = struct('rho', rho, 'v', v, 'mu', mu);
else        % mixedKernel == true
    if method == "Armijo-Goldstein"
        N_kernels = simPar.channelEstimator.GPR.mixKernelNum;
        alpha = 3*ones(1, N_kernels);
        alpha_w = 0.01*ones(1, N_kernels);

        kappa = 0.5;

        Nig = 4;
    ell_ig = zeros(Nig, 1);
    nig_v = {[0; 0 ; 15],[0; 0; -15],[15; 0; 0],[-15; 0; 0]};
    for idx_guess = 1:Nig
        mu = [0.001; 0; 0.001];
        v = nig_v{idx_guess};

        theta = struct('rho', rho, 'v', v, 'mu', mu);                                                   % Set parameters.
        [~, ~, ell_ig(idx_guess), ~] = forward_ell_only(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, true);       % force single kernel. 前向推断函数，反向传播
    end

    [~, idx_M] = max(ell_ig);

    v_direc = nig_v{idx_M};

        


        theta = struct();
%         theta.kernelWeights = [0.7, 0.3*ones(1, N_kernels-1)];
        theta.kernelParams = struct('rho', rho, 'v', v, 'mu', mu);
        theta.kernelWeights = 1/N_kernels*ones(1, N_kernels);
%         theta.kernelWeights(9) = 10;
%         theta.kernelWeights = theta.kernelWeights/sum(theta.kernelWeights);
        theta.kernelParams(1).mu = mu;

%         theta1=pi/6+(0:N_kernels-1)*2*pi/(N_kernels-1)/3;
%         for idx = 1:N_kernels
%             theta.kernelParams(idx).mu = 10*[cos(theta1(idx));0;sin(theta1(idx))];
%             theta.kernelParams(idx).v = [0; 0; 0];
%             theta.kernelParams(idx).rho = rho;
%         end

        

%         theta1=pi/6+(0:N_kernels/2-1)*2*pi/(N_kernels/2-1)/3;
%         for idx = 1:N_kernels/2
%             theta.kernelParams(idx).mu = 10*[cos(theta1(idx));0;sin(theta1(idx))];
%             theta.kernelParams(idx).v = [0; 0; 0];
%             theta.kernelParams(idx).rho = rho;
%         end
% 
%         for idx = N_kernels/2+1:N_kernels
%             theta.kernelParams(idx).mu = 10*[cos(theta1(idx-N_kernels/2));0;sin(theta1(idx-N_kernels/2))];
%             theta.kernelParams(idx).v = [-10; 0; 0];%!![-5; 0; 0]
%             theta.kernelParams(idx).rho = rho;
%         end

        Num_v = 3;
        Num_w = N_kernels/Num_v;
        Set_v = {v_direc,v_direc*0.5,v_direc*0};
        theta1=pi/6+(0:Num_w-1)*2*pi/(Num_w-1)/3;
        for idx = 1:Num_w
            for idx2 = 1:Num_v
            theta.kernelParams((idx2-1)*Num_w+idx).mu = 10*[cos(theta1(idx));0;sin(theta1(idx))];
            theta.kernelParams((idx2-1)*Num_w+idx).v = Set_v{idx2};
            theta.kernelParams((idx2-1)*Num_w+idx).rho = rho;
            end
        end



        for iter = 1:N_iter
%             [~, ~, ell1, d1] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);
            thetam = theta;
%              [~, ~, ell1, d1] = forward_mm(x,xb, p,pb, theta,thetam, k0, c, sigma2_n, N, y, simPar, false);
            for idx_mm = 1:1
                [~, ~, ell1, d1] = forward_mm(x,xb, p,pb, theta,thetam, k0, c, sigma2_n, N, y, simPar, false);


            % Update concentration parameters mu and kernel weights w.
            for idx = 1:N_kernels
                theta0 = theta;
                loop = true;
                cnt = 0;

%                 while loop && cnt <= 10
% 
%                     mu = theta0.kernelParams(idx).mu + alpha(idx)*d1.d_mu(:, idx);
% 
%                     theta.kernelParams(idx).mu = mu;
% 
%                     [A, B, ell2, d2] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);
% 
%                     Q = ell1;
%                     if ell2 <= Q
%                         alpha(idx) = alpha(idx) * kappa;
%                     else
%                         loop = false;
%                     end
%                     cnt = cnt + 1;
%                 end

%                 if loop == false
%                     ell1 = ell2;
%                     d1 = d2;
%                     theta0 = theta;
%                 end

                % Optimize for weights.
                loop = true;
                cnt = 0;
                while loop && cnt <= 10
                    Kym = sigma2_n * eye(N);
                    w = theta.kernelWeights;
                    Ks = cell(N_kernels, 1);
                    for idxk = 1:N_kernels
                        [Ks{idxk}, ~] = get_K_only(x,x, p,p, theta.kernelParams(idxk), k0, c); %kernel
                         Kym = Kym + w(idxk)*Ks{idxk};     % mixed kernel. 
                    end
                    w0 = theta0.kernelWeights;
%                     w0(idx) = w0(idx) * exp(alpha_w(idx) * d1.d_w(idx) * w0(idx));
                    w0(idx) = w0(idx) +alpha_w(idx) * d1.d_w(idx);
                    if w0(idx)<0
                        w0(idx) = 0;
                    w = w0 / sum(w0);

                    theta.kernelWeights = w;
                        cnt = cnt+1;
                    else
                    w = w0 / sum(w0);

                    theta.kernelWeights = w;

                    [A, B, ell2, ~] = forward_ell_only(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);

                    Kymix = sigma2_n * eye(N);
                    w = theta.kernelWeights;

                    for idxk = 1:N_kernels
%                         [Ks{idxk}, grads{idxk}] = get_K(x,x, p,p, theta.kernelParams(idxk), k0, c); %kernel，可改成贝塞尔函数
                         Kymix = Kymix + w(idxk)*Ks{idxk};     % mixed kernel. 
                    end

%                     Kymix=get_K(x,xb, p,pb, theta, k0, c);

                    iKym = inv(Kym);
                    mix_m = Kymix-Kym;

                    ls2 = 2*real(iKym(:).'*mix_m(:));

                    Q = ell1+ls2;
                    ell2 = ell2+ls2;
                    if ell2 <= Q
                        alpha_w(idx) = alpha_w(idx) * kappa;
                    else
                        loop = false;
                    end
                    cnt = cnt + 1;
                    end
                    
%                     Kym=get_K(x,xb, p,pb, theta, k0, c);
                end

                if loop == true
                    theta = theta0; % do not update if the target value is not improved.
                end

            end

            Record_fitness(iter, 1) = ell1;
%             Record_fitness(iter, 2) = A;
Record_fitness(iter, 2) = 0;
            Record_fitness(iter, 3) = 0;%B

            if is_disp
                fprintf('%d: fitness = %f, mu = [%.4f, %.4f, %.4f], rho = %.4f\n',...
                    iter, ell2, mu(1), mu(2), mu(3), rho);
            end
        end

        end
    else
        error("Method not implemented.");
    end

    theta_opt = theta;
end
%     else        % mixedKernel == true
%         if method == "Armijo-Goldstein"
%             N_kernels = simPar.channelEstimator.GPR.mixKernelNum;
%             alpha = 3*ones(1, N_kernels);
%             alpha_w = 0.1*ones(1, N_kernels);
%
%             kappa = 0.5;
%
%             theta = struct();
%             theta.kernelWeights = [0.7, 0.3*ones(1, N_kernels-1)];
%             theta.kernelParams = struct('rho', rho, 'v', v, 'mu', mu);
%             theta.kernelParams(1).mu = mu;
%             for idx = 2:N_kernels
%                 theta.kernelParams(idx).mu = 0.005*randn([3, 1]);
%                 theta.kernelParams(idx).v = [0; 0; 0];
%                 theta.kernelParams(idx).rho = rho;
%             end
%
%
%             for iter = 1:N_iter
%
%                 % Update concentration parameters mu and kernel weights w.
%                 for idx = 1:N_kernels
%                     [~, ~, ell1, d1] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);
%                     theta0 = theta;
%                     loop = true;
%                     cnt = 0;
%
%                     while loop && cnt <= 10
%
%                         mu = theta0.kernelParams(idx).mu + alpha(idx)*d1.d_mu(:, idx);
%
%                         theta.kernelParams(idx).mu = mu;
%
%                         [A, B, ell2, d2] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);
%
%                         Q = ell1;
%                         if ell2 <= Q
%                             alpha(idx) = alpha(idx) * kappa;
%                         else
%                             loop = false;
%                         end
%                         cnt = cnt + 1;
%                     end
%
%                     if loop == false
%                         ell1 = ell2;
%                         d1 = d2;
%                         theta0 = theta;
%                     end
%
%                     % Optimize for weights.
%                     loop = true;
%                     cnt = 0;
%                     while loop && cnt <= 10
%                         w0 = theta0.kernelWeights;
%                         w0(idx) = w0(idx) * exp(alpha_w(idx) * d1.d_w(idx) * w0(idx));
%                         w = w0 / sum(w0);
%
%                         theta.kernelWeights = w;
%
%                         [A, B, ell2, ~] = forward(x,xb, p,pb, theta, k0, c, sigma2_n, N, y, simPar, false);
%
%                         Q = ell1;
%                         if ell2 <= Q
%                             alpha_w(idx) = alpha_w(idx) * kappa;
%                         else
%                             loop = false;
%                         end
%                         cnt = cnt + 1;
%                     end
%
%                     if loop == true
%                         theta = theta0; % do not update if the target value is not improved.
%                     end
%
%                 end
%
%                 Record_fitness(iter, 1) = ell2;
%                 Record_fitness(iter, 2) = A;
%                 Record_fitness(iter, 3) = B;
%
%                 if is_disp
%                     fprintf('%d: fitness = %f, mu = [%.4f, %.4f, %.4f], rho = %.4f\n',...
%                         iter, ell2, mu(1), mu(2), mu(3), rho);
%                 end
%
%             end
%         else
%             error("Method not implemented.");
%         end
%
%         theta_opt = theta;
%     end


if false
    save('mus.mat', 'mu_arr');
    fprintf('File saved.\n');
end

end

function [A, dA] = iget_A(N_BS, thetas)
% This function returns the far-field dictionary matrix defined by thetas.
S = length(thetas);
A = zeros(N_BS, S);
dA = zeros(N_BS, S);

seq = (0:N_BS-1).';
for idx = 1:S
    A(:,idx) = exp(1j*pi*thetas(idx)*seq)/sqrt(N_BS);
    dA(:,idx) = A(:,idx).*(1j*pi*seq);
end
end